Goal: we want to approximate the dominant eigenfunctions of the transfer operator underlying some observed dynamics.

To do this, we need to choose a good basis set. The variational principle of molecular kinetics lets us choose the optimal linear combinations of a given set of basis functions. In MSMs, these basis functions are indicator functions on a partitioning of the configuration space. In tICA, these basis functions are arbitrary input variables, usually internal coordinates like dihedral angles.

Another approach to constructing a basis set is to measure RMSD or weighted RMSD to some reference structures. A major benefit of that approach is that the model should now be differentiable with respect to the parameters defining the basis functions-- the locations of the references, and their associated weights and scale parameters.



In [27]:

    
import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
%matplotlib inline
import mdtraj
plt.rc('font', family='serif')



In [28]:

    
# fetch example data

from msmbuilder.example_datasets import FsPeptide
dataset = FsPeptide().get()
fs_trajectories = dataset.trajectories
fs_t = fs_trajectories[0]









    



loading trajectory_1.xtc...
loading trajectory_10.xtc...
loading trajectory_11.xtc...
loading trajectory_12.xtc...
loading trajectory_13.xtc...
loading trajectory_14.xtc...
loading trajectory_15.xtc...
loading trajectory_16.xtc...
loading trajectory_17.xtc...
loading trajectory_18.xtc...
loading trajectory_19.xtc...
loading trajectory_2.xtc...
loading trajectory_20.xtc...
loading trajectory_21.xtc...
loading trajectory_22.xtc...
loading trajectory_23.xtc...
loading trajectory_24.xtc...
loading trajectory_25.xtc...
loading trajectory_26.xtc...
loading trajectory_27.xtc...
loading trajectory_28.xtc...
loading trajectory_3.xtc...
loading trajectory_4.xtc...
loading trajectory_5.xtc...
loading trajectory_6.xtc...
loading trajectory_7.xtc...
loading trajectory_8.xtc...
loading trajectory_9.xtc...



In [29]:

    
# 1. internal coordinate basis sets

from msmbuilder.featurizer import DihedralFeaturizer

basis_sets = dict()

dih_model=DihedralFeaturizer()
X = dih_model.fit_transform(fs_trajectories)
basis_sets['dihedral_phi_psi'] = X

dih_model=DihedralFeaturizer(types=['phi', 'psi', 'omega', 'chi1', 'chi2', 'chi3', 'chi4'])
X = dih_model.fit_transform(fs_trajectories)
basis_sets['dihedral_all'] = X



In [7]:

    
# 2. RMSD to reg-space refs
references = [x[::4000] for x in fs_trajectories]
refs = references[0]
for ref in references[1:]:
    refs = refs + ref


# oh, this is already built into MSMBuilder
#def compute_rmsds_to_refs(trajectories,refs):
#    basis_exps = []
#    for traj in trajectories:
#        rmsd_to_refs = np.zeros((len(traj),len(refs)))
#        for i in range(len(refs)):
#            rmsd_to_refs[:,i] = mdtraj.rmsd(traj,refs,i)
#        basis_exps.append(rmsd_to_refs)
#    return basis_exps
    
#basis_sets['rmsd_reg'] = compute_rmsds_to_refs(fs_trajectories,refs)

from msmbuilder.featurizer import RMSDFeaturizer
rmsdf = RMSDFeaturizer(refs)
basis_sets['rmsd_reg'] = rmsdf.fit_transform(fs_trajectories)

print(len(refs))



In [8]:

    
# 3. RMSD to cluster-center refs

# pick cluster centers by k-medoids
from msmbuilder.cluster import MiniBatchKMedoids
kmed = MiniBatchKMedoids(50,batch_size=200)
kmed.fit(X)









    Out[8]:





MiniBatchKMedoids(batch_size=200, max_iter=5, max_no_improvement=10,
         metric='euclidean', n_clusters=50, random_state=None)



In [9]:

    
# extract examplar configurations
clever_refs = []
for ind in kmed.cluster_ids_:
    clever_refs.append(fs_trajectories[ind[0]][ind[1]])

# convert list of length-1 mdtraj Trajectories to a single trajectory
# -- currently doing this in the most inefficient way possible, but it's 
# a tiny list so it doesn't matter
clever_ref = clever_refs[0]
for i in range(1,len(clever_refs)):
    clever_ref = clever_ref + clever_refs[i]
clever_refs = clever_ref
print(len(clever_refs))



In [10]:

    
rmsdf = RMSDFeaturizer(clever_refs)
basis_sets['rmsd_kmed'] = rmsdf.fit_transform(fs_trajectories)



In [11]:

    
rmsd_kmed_basis = np.array(basis_sets['rmsd_kmed'])



In [26]:

    
# 4. wRMSD to reg-space refs

from MDAnalysis.analysis.rms import rmsd as wRMSD

# compute weights from atomwise deviations
atomwise_deviations = np.load('fs_atomwise_deviations_tau=20.npy')
mean = atomwise_deviations.mean(0)
weights = np.exp(-mean/0.065)



In [84]:

    
def compute_wrmsd_traj(trajectories,refs,weights):
    ''' compute wRMSDs from trajectories to a list of references'''
    
    basis_exps = []
    for traj in trajectories:
        wrmsd_to_refs = np.zeros((len(traj),len(refs)))
        for i in range(len(traj)):
            for j in range(len(refs)):
                wrmsd_to_refs[i,j] = wRMSD(refs.xyz[j],traj.xyz[i],weights=weights,center='True')
        basis_exps.append(wrmsd_to_refs)
    return basis_exps

weighted_trajs = compute_wrmsd_traj(fs_trajectories,refs,weights)
basis_sets['wrmsd_exp'] = weighted_trajs



In [77]:

    
#np.save('wrmsd_exp.npy',weighted_trajs)
#np.save('wrmsd_inv.npy',inv_mean_weighted_trajs)
#np.save('wrmsd_mean.npy',mean_weighted_trajs)



In [30]:

    
# loading from file
weighted_trajs = np.load('wrmsd_exp.npy')
weighted_trajs.shape # 28 trajectories, 10k frames / traj, 84 basis fxns









    Out[30]:





(28, 10000, 84)



In [31]:

    
# grid search over alphas, assuming the same alpha for all 

alphas = np.logspace(-10,10,base=np.e)

for i,alpha in enumerate(alphas):
    basis_sets['wrmsd_gauss_{0}'.format(i)] = np.exp(-(weighted_trajs)**2 / (alpha*np.ones(84)))



In [43]:

    
rmsd_basis = np.array(basis_sets['rmsd_reg'])



In [44]:

    
for i,alpha in enumerate(alphas):
    basis_sets['rmsd_gauss_{0}'.format(i)] = np.exp(-(rmsd_basis)**2 / (alpha*np.ones(84)))



In [14]:

    
for i,alpha in enumerate(alphas):
    basis_sets['rmsd_kmed_gauss_{0}'.format(i)] = np.exp(-(rmsd_kmed_basis)**2 / (alpha*np.ones(50)))



In [24]:

    
basis_sets['rmsd_kmed_gauss_18']









    Out[24]:





1.0



In [37]:

    
alphas[18]









    Out[37]:





0.070435264538866088



In [66]:

    
# 5. indicator functions
clusters = kmed.transform(X)

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(n_values=np.max(np.vstack(clusters))+1,
                    sparse=False)
cluster_enc = enc.fit_transform(clusters[0].reshape((len(clusters[0]),1)))









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-66-aae9ee40d38f> in <module>()
      1 # 5. indicator functions
----> 2 clusters = kmed.transform(X)
      3 
      4 from sklearn.preprocessing import OneHotEncoder
      5 enc = OneHotEncoder(n_values=np.max(np.vstack(clusters))+1,

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/msmbuilder/cluster/base.pyc in transform(self, sequences)
    163     def transform(self, sequences):
    164         """Alias for predict"""
--> 165         return self.predict(sequences)
    166 
    167     def partial_transform(self, X):

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/msmbuilder/cluster/base.pyc in predict(self, sequences, y)
    109         """
    110         predictions = []
--> 111         check_iter_of_sequences(sequences, allow_trajectory=self._allow_trajectory)
    112         for X in sequences:
    113             predictions.append(self.partial_predict(X))

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/msmbuilder/utils/validation.pyc in check_iter_of_sequences(sequences, allow_trajectory, ndim, max_iter)
     53 
     54     if not value:
---> 55         raise ValueError('sequences must be a list of sequences')
     56 
     57 

ValueError: sequences must be a list of sequences



In [65]:

    
cluster_enc









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-65-2093f2194988> in <module>()
----> 1 cluster_enc

NameError: name 'cluster_enc' is not defined



In [163]:

    
from msmbuilder.msm import MarkovStateModel
msm = MarkovStateModel()
msm.fit(clusters)
msm.score_









    



MSM contains 1 strongly connected component above weight=1.00. Component 0 selected, with population 100.000000%






    Out[163]:





9.198693045227742



In [165]:

    
plt.imshow(msm.transmat_,interpolation='none')









    Out[165]:





<matplotlib.image.AxesImage at 0x157161110>



In [15]:

    
from msmbuilder.decomposition import tICA

def score_via_tica(basis_set,m=10,lag_time=100):
    ''' Since the optimal matrix of expansion coefficients A is given by
    concatenating the generalized eigenvectors of the overlap and time-lagged overlap
    matrices, we can directly measure the quality of a candidate basis set for rank-m
    transfer operator approximation by performing tICA, then summing the top m eigenvalues.
    '''
    tica = tICA(lag_time=lag_time)
    tica.fit(basis_set)
    return np.sum(tica.eigenvalues_[:m])

def score_via_gmrq(basis_set,expansion_coeffs=None,m=10,lag_time=100):
    
    ### WARNING: this is buggy-- this *should* give the same answer as tICA
    # if you pass it the same basis_set and don't specify expansion_coeffs
    # but the answer's off by ~10% on examples.
    # I'm also not handling singular matrices correctly I think.
    tau = lag_time
    
    # compute the overlap and time-lagged overlap matrices
    S = basis_set.T.dot(basis_set) / len(basis_set)
    C = basis_set[tau:].T.dot(basis_set[:-tau]) / len(basis_set - tau)
    
    # expansion coefficients
    if expansion_coeffs == None:
        import scipy.linalg as la
        eig_vals,eig_vecs = la.eig(C,S)

        # discard imaginary component
        eig_vals = np.array(eig_vals,dtype=float)
        eig_vecs = np.array(eig_vecs,dtype=float)
        
        A = eig_vecs[:,:m]
        #A = np.diag(np.ones(basis_set.shape[1]))
    else:
        A = expansion_coeffs
    
    # compute the GMRQ from these overlap matrices
    P = A.T.dot(C).dot(A)
    Q = A.T.dot(S).dot(A)
    try:
        Q_inv = np.linalg.inv(Q)
    except:
        # it could be singular
        Q_inv = np.linalg.pinv(Q)
    P_Q_inv = P.dot(Q_inv)
    diag = np.sort(np.diag(P_Q_inv))[::-1]
    partial_trace = diag[:m]
    return P_Q_inv,np.sum(partial_trace)



In [16]:

    
score = score_via_tica



In [156]:

    
# actually, I might need to make a small modification to GMRQ to accept
# direct eigenfunction approximations that



In [157]:

    
P_Q_inv,gmrq = score_via_gmrq(weighted_trajs[0])
P_Q_inv.shape,gmrq









    



/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/IPython/kernel/__main__.py:25: ComplexWarning: Casting complex values to real discards the imaginary part
/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/IPython/kernel/__main__.py:26: ComplexWarning: Casting complex values to real discards the imaginary part






    Out[157]:





((10, 10), 5.4342322949377522)



In [159]:

    
# these should be equal: indicates a problem
gmrq,score_via_tica([weighted_trajs[0]])









    Out[159]:





(5.4342322949377522, 6.0719085133573349)



In [49]:

    
X[0].shape









    Out[49]:





(10000, 148)



In [ ]:

    
scores = dict()



In [ ]:

    
clusters = kmed.fit_transform(basis_sets['



In [45]:

    
m=20
lag_time=100
for name in basis_sets:
    if name not in scores:
        scores[name] = score(basis_sets[name],m=m,lag_time=lag_time)
    name_ = name+'_renorm'
    if name_ not in scores:
        try:
            renorm = [(traj.T / traj.T.sum(0)).T for traj in basis_sets[name]]
            scores[name_] = score(renorm,m=m,lag_time=lag_time)
        except:
            pass









    



/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/IPython/kernel/__main__.py:9: RuntimeWarning: invalid value encountered in divide



In [51]:

    
renorm = [(traj.T / traj.T.sum(0)).T for traj in basis_sets['wrmsd_gauss_16']]



In [55]:

    
from sklearn.decomposition import PCA
pca = PCA()
pca.fit(np.vstack(renorm))
X = pca.transform(np.vstack(renorm))



In [56]:

    
plt.plot(pca.explained_variance_ratio_)
plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))









    Out[56]:





[<matplotlib.lines.Line2D at 0x15558c7d0>]



In [58]:

    
plt.scatter(X[:,0],X[:,1],s=1,c=range(len(X)),linewidths=0)









    Out[58]:





<matplotlib.collections.PathCollection at 0x155598d90>



In [64]:

    
for i in range(5):
    plt.bar(range(len(pca.components_[i])),pca.components_[i])
    plt.title('Component {0}'.format(i+1))
    plt.figure()









    












    












    












    












    












    





<matplotlib.figure.Figure at 0x117698c90>



In [49]:

    
sorted(scores.items(),key=lambda item:-item[1])









    Out[49]:





[('wrmsd_gauss_16_renorm', 18.062333858127232),
 ('wrmsd_gauss_17_renorm', 17.997700176042137),
 ('wrmsd_gauss_15_renorm', 17.909486565837973),
 ('dihedral_all', 17.84744612314757),
 ('wrmsd_gauss_18_renorm', 17.729380885594093),
 ('dihedral_phi_psi', 17.706953927713055),
 ('dihedral_all_renorm', 17.68167196162312),
 ('wrmsd_gauss_14_renorm', 17.633260956280914),
 ('wrmsd_gauss_13_renorm', 17.323640306902469),
 ('wrmsd_gauss_19_renorm', 17.322480861438155),
 ('wrmsd_gauss_12_renorm', 17.042785744436927),
 ('wrmsd_gauss_11_renorm', 16.821384524698502),
 ('wrmsd_gauss_10_renorm', 16.650581701134538),
 ('wrmsd_gauss_20_renorm', 16.584601680306314),
 ('wrmsd_gauss_9_renorm', 16.525653920174253),
 ('wrmsd_gauss_8_renorm', 16.437955391319441),
 ('wrmsd_gauss_7_renorm', 16.377815524257706),
 ('wrmsd_gauss_6_renorm', 16.33718581151555),
 ('wrmsd_gauss_18', 15.867563082810944),
 ('wrmsd_gauss_17', 15.660209846734993),
 ('wrmsd_gauss_21_renorm', 15.604154464702992),
 ('wrmsd_gauss_19', 15.574365762409482),
 ('wrmsd_gauss_16', 14.954281225788989),
 ('wrmsd_gauss_20', 14.92095943065088),
 ('wrmsd_gauss_22_renorm', 14.807239921392091),
 ('wrmsd_gauss_23_renorm', 14.257874818289014),
 ('wrmsd_gauss_21', 14.24894879943651),
 ('wrmsd_gauss_24_renorm', 13.893775330341496),
 ('wrmsd_gauss_15', 13.857112461769063),
 ('wrmsd_gauss_22', 13.728515930356821),
 ('wrmsd_gauss_25_renorm', 13.661308501106587),
 ('wrmsd_gauss_26_renorm', 13.513534708906029),
 ('wrmsd_gauss_27_renorm', 13.420870569702911),
 ('wrmsd_gauss_23', 13.374535293414604),
 ('wrmsd_gauss_28_renorm', 13.362808404771453),
 ('wrmsd_gauss_29_renorm', 13.326037647590473),
 ('wrmsd_gauss_30_renorm', 13.302454933765162),
 ('wrmsd_gauss_31_renorm', 13.287169437493683),
 ('wrmsd_gauss_32_renorm', 13.277183349181637),
 ('wrmsd_gauss_33_renorm', 13.270622694318371),
 ('wrmsd_gauss_34_renorm', 13.266295712210772),
 ('wrmsd_gauss_35_renorm', 13.263434353246113),
 ('wrmsd_gauss_36_renorm', 13.261538789559015),
 ('wrmsd_gauss_37_renorm', 13.260281533910149),
 ('wrmsd_gauss_38_renorm', 13.259446924194508),
 ('wrmsd_gauss_39_renorm', 13.258892632635325),
 ('wrmsd_gauss_40_renorm', 13.258524340916489),
 ('wrmsd_gauss_41_renorm', 13.258279836690141),
 ('wrmsd_gauss_42_renorm', 13.258116925863625),
 ('wrmsd_gauss_49_renorm', 13.258044304914778),
 ('wrmsd_gauss_43_renorm', 13.258009521130333),
 ('wrmsd_gauss_44_renorm', 13.2579376443831),
 ('wrmsd_gauss_48_renorm', 13.257913885688525),
 ('wrmsd_gauss_45_renorm', 13.257886407860111),
 ('wrmsd_gauss_46_renorm', 13.257860744593804),
 ('wrmsd_gauss_47_renorm', 13.257782790913508),
 ('wrmsd_gauss_24', 13.152734462661316),
 ('rmsd_reg', 13.109391912745712),
 ('wrmsd_gauss_25', 13.025641509847539),
 ('wrmsd_gauss_26', 12.950856083238415),
 ('wrmsd_gauss_27', 12.905145867822972),
 ('wrmsd_gauss_28', 12.876489335025948),
 ('wrmsd_gauss_29', 12.858204818201562),
 ('wrmsd_gauss_30', 12.846390895034679),
 ('wrmsd_gauss_31', 12.838689353245645),
 ('wrmsd_gauss_32', 12.833637156407699),
 ('wrmsd_gauss_33', 12.830308516604498),
 ('wrmsd_gauss_34', 12.828108916891862),
 ('wrmsd_gauss_35', 12.826652464899468),
 ('wrmsd_gauss_36', 12.82568677144436),
 ('wrmsd_gauss_37', 12.825045893371374),
 ('wrmsd_gauss_38', 12.824620301135017),
 ('wrmsd_gauss_39', 12.824337599200243),
 ('wrmsd_gauss_49', 12.824152133171681),
 ('wrmsd_gauss_40', 12.824149612366677),
 ('wrmsd_gauss_41', 12.824024915895512),
 ('wrmsd_gauss_42', 12.823941602710157),
 ('wrmsd_gauss_43', 12.823887699412621),
 ('wrmsd_gauss_44', 12.823851154905379),
 ('wrmsd_gauss_46', 12.823828892626274),
 ('wrmsd_gauss_45', 12.82382556651438),
 ('wrmsd_gauss_47', 12.823797868369242),
 ('wrmsd_gauss_48', 12.823754088769627),
 ('wrmsd_gauss_14', 12.278041805120811),
 ('rmsd_kmed_gauss_20', 11.766182101721679),
 ('rmsd_kmed_gauss_19', 11.763925842245483),
 ('rmsd_kmed_gauss_21', 11.310456507748359),
 ('rmsd_kmed_gauss_18', 11.124187919015691),
 ('rmsd_kmed_gauss_22', 10.677559180391107),
 ('wrmsd_gauss_13', 10.31614949643561),
 ('rmsd_kmed_gauss_17', 10.168321818883744),
 ('rmsd_kmed_gauss_23', 10.084341220518228),
 ('rmsd_kmed', 9.7069288765287691),
 ('rmsd_kmed_gauss_24', 9.6287034212247811),
 ('rmsd_kmed_gauss_25', 9.3096387453821148),
 ('rmsd_kmed_gauss_26', 9.1039942642920675),
 ('rmsd_kmed_gauss_27', 8.9803873281141264),
 ('rmsd_kmed_gauss_28', 8.9086581604393604),
 ('rmsd_kmed_gauss_29', 8.8670747563154073),
 ('rmsd_kmed_gauss_30', 8.8422777231373502),
 ('rmsd_kmed_gauss_31', 8.8270179917235367),
 ('rmsd_kmed_gauss_32', 8.8174011924691982),
 ('rmsd_kmed_gauss_33', 8.8112375921454635),
 ('rmsd_kmed_gauss_34', 8.8072405274713255),
 ('rmsd_kmed_gauss_35', 8.8046273852663166),
 ('rmsd_kmed_gauss_36', 8.8029095393034726),
 ('rmsd_kmed_gauss_37', 8.8017760271432444),
 ('rmsd_kmed_gauss_38', 8.80102618959498),
 ('rmsd_kmed_gauss_39', 8.8005293648990559),
 ('rmsd_kmed_gauss_40', 8.800199641302342),
 ('rmsd_kmed_gauss_41', 8.799980994987127),
 ('rmsd_kmed_gauss_42', 8.7998361951471029),
 ('rmsd_kmed_gauss_43', 8.7997388681466902),
 ('rmsd_kmed_gauss_44', 8.7996772237062295),
 ('rmsd_kmed_gauss_49', 8.7996479445528646),
 ('rmsd_kmed_gauss_45', 8.7996338320182623),
 ('rmsd_kmed_gauss_46', 8.7996112556671164),
 ('rmsd_kmed_gauss_47', 8.7995553269988989),
 ('rmsd_kmed_gauss_48', 8.7995395499575455),
 ('rmsd_kmed_gauss_16', 8.7346803582728931),
 ('wrmsd_gauss_12', 7.8218681481739738),
 ('rmsd_kmed_gauss_15', 6.7175340377270398),
 ('wrmsd_gauss_11', 4.3741124852855071),
 ('rmsd_kmed_gauss_14', 4.1890911430937914),
 ('rmsd_kmed_gauss_13', 1.5125415570100416),
 ('wrmsd_gauss_10', 1.0419361784489982),
 ('dihedral_phi_psi_renorm', 0.33935787334512252),
 ('rmsd_kmed_gauss_12', 0.24358041259863061),
 ('wrmsd_gauss_9', 0.095996048647767132),
 ('rmsd_kmed_gauss_11', 0.014619214822630034),
 ('wrmsd_gauss_8', 0.0067606418863852538),
 ('rmsd_kmed_gauss_10', 0.00039482609085552479),
 ('wrmsd_gauss_7', 0.00027116580216172238),
 ('rmsd_kmed_gauss_9', 3.6755151613909405e-06),
 ('wrmsd_gauss_6', 2.9127550656960712e-06),
 ('rmsd_kmed_gauss_8', 4.7460374450653319e-09),
 ('wrmsd_gauss_5', 3.78485909476836e-09),
 ('rmsd_kmed_gauss_7', 2.7234247058407534e-13),
 ('wrmsd_gauss_4', 1.9452933860967706e-13),
 ('wrmsd_gauss_3', 3.6346798443139045e-19),
 ('wrmsd_gauss_0', 2.979145339711081e-19),
 ('wrmsd_gauss_2', 2.5185844069609163e-19),
 ('wrmsd_gauss_1', 2.4443786114457911e-19),
 ('rmsd_kmed_gauss_6', 2.2436602173880205e-19),
 ('rmsd_kmed_gauss_1', 1.8491730464170258e-19),
 ('rmsd_kmed_gauss_0', 1.8471382765058349e-19),
 ('rmsd_kmed_gauss_3', 1.4294492845370928e-19),
 ('rmsd_kmed_gauss_2', 1.3864094899579114e-19),
 ('rmsd_kmed_gauss_5', 1.1602319255626076e-19),
 ('rmsd_kmed_gauss_4', 1.0736275097248106e-19)]



In [46]:

    
sorted(scores.items(),key=lambda item:-item[1])









    Out[46]:





[('dihedral_all', 17.84744612314757),
 ('dihedral_phi_psi', 17.706953927713055),
 ('rmsd_gauss_19', 16.115931213836159),
 ('wrmsd_gauss_18', 15.867563082810944),
 ('rmsd_gauss_20', 15.749269619316632),
 ('rmsd_gauss_18', 15.713492655475971),
 ('wrmsd_gauss_17', 15.660209846734993),
 ('wrmsd_gauss_19', 15.574365762409482),
 ('rmsd_gauss_21', 15.01726793822754),
 ('wrmsd_gauss_16', 14.954281225788989),
 ('wrmsd_gauss_20', 14.92095943065088),
 ('rmsd_gauss_17', 14.614756841721887),
 ('wrmsd_gauss_21', 14.24894879943651),
 ('rmsd_gauss_22', 14.244254350190033),
 ('wrmsd_gauss_15', 13.857112461769063),
 ('wrmsd_gauss_22', 13.728515930356821),
 ('rmsd_gauss_23', 13.586667712331593),
 ('wrmsd_gauss_23', 13.374535293414604),
 ('wrmsd_gauss_24', 13.152734462661316),
 ('rmsd_reg', 13.109391912745712),
 ('rmsd_gauss_24', 13.099652108436416),
 ('wrmsd_gauss_25', 13.025641509847539),
 ('wrmsd_gauss_26', 12.950856083238415),
 ('wrmsd_gauss_27', 12.905145867822972),
 ('wrmsd_gauss_28', 12.876489335025948),
 ('wrmsd_gauss_29', 12.858204818201562),
 ('wrmsd_gauss_30', 12.846390895034679),
 ('wrmsd_gauss_31', 12.838689353245645),
 ('wrmsd_gauss_32', 12.833637156407699),
 ('wrmsd_gauss_33', 12.830308516604498),
 ('wrmsd_gauss_34', 12.828108916891862),
 ('wrmsd_gauss_35', 12.826652464899468),
 ('wrmsd_gauss_36', 12.82568677144436),
 ('wrmsd_gauss_37', 12.825045893371374),
 ('wrmsd_gauss_38', 12.824620301135017),
 ('wrmsd_gauss_39', 12.824337599200243),
 ('wrmsd_gauss_49', 12.824152133171681),
 ('wrmsd_gauss_40', 12.824149612366677),
 ('wrmsd_gauss_41', 12.824024915895512),
 ('wrmsd_gauss_42', 12.823941602710157),
 ('wrmsd_gauss_43', 12.823887699412621),
 ('wrmsd_gauss_44', 12.823851154905379),
 ('wrmsd_gauss_46', 12.823828892626274),
 ('wrmsd_gauss_45', 12.82382556651438),
 ('wrmsd_gauss_47', 12.823797868369242),
 ('wrmsd_gauss_48', 12.823754088769627),
 ('rmsd_gauss_16', 12.798094379445544),
 ('rmsd_gauss_25', 12.792991333324334),
 ('rmsd_gauss_26', 12.619589240195745),
 ('rmsd_gauss_27', 12.520551994850512),
 ('rmsd_gauss_28', 12.46256078608423),
 ('rmsd_gauss_29', 12.427753681322686),
 ('rmsd_gauss_30', 12.406351392827816),
 ('rmsd_gauss_31', 12.392911581875216),
 ('rmsd_gauss_32', 12.384329930139463),
 ('rmsd_gauss_33', 12.378781887020025),
 ('rmsd_gauss_34', 12.375163101890941),
 ('rmsd_gauss_35', 12.372788058096763),
 ('rmsd_gauss_36', 12.371222670872186),
 ('rmsd_gauss_37', 12.370187955213696),
 ('rmsd_gauss_38', 12.369502684894375),
 ('rmsd_gauss_39', 12.369048258553502),
 ('rmsd_gauss_40', 12.368746658710853),
 ('rmsd_gauss_41', 12.368546356990471),
 ('rmsd_gauss_42', 12.36841323144016),
 ('rmsd_gauss_43', 12.368324305357833),
 ('rmsd_gauss_44', 12.368266925466413),
 ('rmsd_gauss_45', 12.36822692214793),
 ('rmsd_gauss_46', 12.36820651111838),
 ('rmsd_gauss_47', 12.368185007986288),
 ('rmsd_gauss_48', 12.368137549600101),
 ('rmsd_gauss_49', 12.368121596008969),
 ('wrmsd_gauss_14', 12.278041805120811),
 ('wrmsd_gauss_13', 10.31614949643561),
 ('rmsd_gauss_15', 10.036468110198459),
 ('wrmsd_gauss_12', 7.8218681481739738),
 ('rmsd_gauss_14', 6.3224299949963516),
 ('wrmsd_gauss_11', 4.3741124852855071),
 ('rmsd_gauss_13', 2.4977288632779393),
 ('wrmsd_gauss_10', 1.0419361784489982),
 ('rmsd_gauss_12', 0.44728470884490795),
 ('wrmsd_gauss_9', 0.095996048647767132),
 ('rmsd_gauss_11', 0.040813561538337088),
 ('wrmsd_gauss_8', 0.0067606418863852538),
 ('rmsd_gauss_10', 0.0027530865667021519),
 ('wrmsd_gauss_7', 0.00027116580216172238),
 ('rmsd_gauss_9', 9.0310393216398062e-05),
 ('wrmsd_gauss_6', 2.9127550656960712e-06),
 ('rmsd_gauss_8', 7.1299398156082122e-07),
 ('wrmsd_gauss_5', 3.78485909476836e-09),
 ('rmsd_gauss_7', 5.6003662758379472e-10),
 ('wrmsd_gauss_4', 1.9452933860967706e-13),
 ('rmsd_gauss_6', 1.2436808662963838e-14),
 ('wrmsd_gauss_3', 3.6346798443139045e-19),
 ('wrmsd_gauss_0', 2.979145339711081e-19),
 ('rmsd_gauss_2', 2.8034987088828231e-19),
 ('wrmsd_gauss_2', 2.5185844069609163e-19),
 ('rmsd_gauss_5', 2.469957136705523e-19),
 ('wrmsd_gauss_1', 2.4443786114457911e-19),
 ('rmsd_gauss_3', 2.3551028812407442e-19),
 ('rmsd_gauss_1', 2.3398837659623333e-19),
 ('rmsd_gauss_4', 2.1234496472649981e-19),
 ('rmsd_gauss_0', 1.7465133797865148e-19)]



In [89]:

    
# 'archive'
scores.items()









    Out[89]:





[('dihedral_all', 17.84744612314757),
 ('rmsd', 13.109391916360241),
 ('dihedral_phi_psi', 17.706953927713055),
 ('rmsd_kmed', 9.0194490101577163),
 ('wrmsd_exp', 13.541230898294337),
 ('rmsd_reg', 13.109391916392024)]



In [ ]:

Other things to try:

Normalize the "gaussian" bases